{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Python integration (for use in Jupyter Notebooks)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Please install the requirements using:\n",
    "    \n",
    "```bash\n",
    "pip install pandas pydataset rpy2\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load `rpy2` extension:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "%load_ext rpy2.ipython"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Install the package (if not already installed):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%R\n",
    "if (!require(devtools, quietly=T)) install.packages(\"devtools\")\n",
    "if (!require(ComplexUpset, quietly=T)) devtools::install_github(\"krassowski/complex-upset\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load ggplot2 and ComplexUpset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%R\n",
    "library(ggplot2)\n",
    "library(ComplexUpset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare the datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>48</th>\n",
       "      <th>112</th>\n",
       "      <th>124</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>title</th>\n",
       "      <td>'Til There Was You</td>\n",
       "      <td>10 Things I Hate About You</td>\n",
       "      <td>100 Mile Rule</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>year</th>\n",
       "      <td>1997</td>\n",
       "      <td>1999</td>\n",
       "      <td>2002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>length</th>\n",
       "      <td>113</td>\n",
       "      <td>97</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>budget</th>\n",
       "      <td>2.3e+07</td>\n",
       "      <td>1.6e+07</td>\n",
       "      <td>1.1e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>rating</th>\n",
       "      <td>4.8</td>\n",
       "      <td>6.7</td>\n",
       "      <td>5.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>votes</th>\n",
       "      <td>799</td>\n",
       "      <td>19095</td>\n",
       "      <td>181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r1</th>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r2</th>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r3</th>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r4</th>\n",
       "      <td>14.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r5</th>\n",
       "      <td>14.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>14.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r6</th>\n",
       "      <td>14.5</td>\n",
       "      <td>14.5</td>\n",
       "      <td>24.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r7</th>\n",
       "      <td>14.5</td>\n",
       "      <td>24.5</td>\n",
       "      <td>14.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r8</th>\n",
       "      <td>4.5</td>\n",
       "      <td>14.5</td>\n",
       "      <td>14.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r9</th>\n",
       "      <td>4.5</td>\n",
       "      <td>14.5</td>\n",
       "      <td>4.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r10</th>\n",
       "      <td>14.5</td>\n",
       "      <td>14.5</td>\n",
       "      <td>14.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mpaa</th>\n",
       "      <td>PG-13</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>R</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Action</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Animation</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Comedy</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Drama</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Documentary</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Romance</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Short</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            48                          112            124\n",
       "title        'Til There Was You  10 Things I Hate About You  100 Mile Rule\n",
       "year                       1997                        1999           2002\n",
       "length                      113                          97             98\n",
       "budget                  2.3e+07                     1.6e+07        1.1e+06\n",
       "rating                      4.8                         6.7            5.6\n",
       "votes                       799                       19095            181\n",
       "r1                          4.5                         4.5            4.5\n",
       "r2                          4.5                         4.5            4.5\n",
       "r3                          4.5                         4.5            4.5\n",
       "r4                         14.5                         4.5            4.5\n",
       "r5                         14.5                         4.5           14.5\n",
       "r6                         14.5                        14.5           24.5\n",
       "r7                         14.5                        24.5           14.5\n",
       "r8                          4.5                        14.5           14.5\n",
       "r9                          4.5                        14.5            4.5\n",
       "r10                        14.5                        14.5           14.5\n",
       "mpaa                      PG-13                       PG-13              R\n",
       "Action                        0                           0              0\n",
       "Animation                     0                           0              0\n",
       "Comedy                        1                           1              1\n",
       "Drama                         0                           0              0\n",
       "Documentary                   0                           0              0\n",
       "Romance                       1                           1              0\n",
       "Short                         0                           0              0"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pydataset import data as load_data\n",
    "movies = load_data('movies').dropna()\n",
    "movies.head(3).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Action', 'Animation', 'Comedy', 'Drama', 'Documentary', 'Romance', 'Short']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "genres = list(movies.columns[-7:])\n",
    "genres"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert the genre indicator columns to use boolean values:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>48</th>\n",
       "      <th>112</th>\n",
       "      <th>124</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Action</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Animation</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Comedy</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Drama</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Documentary</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Romance</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Short</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               48     112    124\n",
       "Action       False  False  False\n",
       "Animation    False  False  False\n",
       "Comedy        True   True   True\n",
       "Drama        False  False  False\n",
       "Documentary  False  False  False\n",
       "Romance       True   True  False\n",
       "Short        False  False  False"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies[genres] = movies[genres] == 1\n",
    "movies[genres].head(3).T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Quick notes on `rpy2` `%%R` magic usage"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- use `-i` switch to import data into R using rpy2 interface\n",
    "- `-w` and `-h` can be used to adjust the width and height of the plot\n",
    "- `-r` switch can be used to adjust DPI"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import the Python data frame to R"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "%R -i movies -i genres"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
